library(tidyverse)
library(tidytext)
library(DT)
library(scales)
library(wordcloud2)
library(gridExtra)
library(ngram)
library(shiny) 

Step 1 - Load the processed text data along with demographic information on contributors

We use the processed data for our analysis and combine it with the demographic information available.

hm_data <- read_csv("../output/processed_moments.csv")

urlfile<-'https://raw.githubusercontent.com/rit-public/HappyDB/master/happydb/data/demographic.csv'
demo_data <- read_csv(urlfile)

Combine both the data sets and keep the required columns for analysis

hm_data <- hm_data %>%
  inner_join(demo_data, by = "wid") %>%
  select(wid,
         original_hm,
         gender, 
         marital, 
         parenthood,
         reflection_period,
         age, 
         country, 
         ground_truth_category, 
         text) %>%
  mutate(count = sapply(hm_data$text, wordcount)) %>%
  filter(gender %in% c("m", "f")) %>%
  filter(marital %in% c("single", "married")) %>%
  filter(parenthood %in% c("n", "y")) %>%
  filter(reflection_period %in% c("24h", "3m")) %>%
  mutate(reflection_period = fct_recode(reflection_period, 
                                        months_3 = "3m", hours_24 = "24h"))
datatable(hm_data)
## Warning in instance$preRenderHook(instance): It seems your data is too
## big for client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html

Create a bag of words using the text data

bag_of_words <-  hm_data %>%
  unnest_tokens(word, text)

word_count <- bag_of_words %>%
  count(word, sort = TRUE)

Create bigrams using the text data

hm_bigrams <- hm_data %>%
  filter(count != 1) %>%
  unnest_tokens(bigram, text, token = "ngrams", n = 2)

bigram_counts <- hm_bigrams %>%
  separate(bigram, c("word1", "word2"), sep = " ") %>%
  count(word1, word2, sort = TRUE)
ui <- navbarPage("What makes people happy?",
                 tabPanel("Overview",
                          
                          titlePanel(h1("Most Frequent Occurrences",
                                        align = "center")),
                          
                          sidebarLayout(
                            sidebarPanel(
                              sliderInput(inputId = "topWordcloud",
                                          label = "Number of terms for word cloud:",
                                          min = 5,
                                          max = 100,
                                          value = 50),
                              br(),
                              br(),
                              
                              checkboxInput(inputId = "topFreqB",
                                            label = "Plot Bar Chart",
                                            value = F),
                              sliderInput(inputId = "topBarchart",
                                          label = "Number of terms for bar chart:",
                                          min = 1,
                                          max = 25,
                                          value = 10),
                              br(),
                              br(),
                              
                              checkboxInput(inputId = "topFreqN",
                                            label = "Plot Network Graph",
                                            value = F),
                              sliderInput(inputId = "topNetwork",
                                          label = "Number of edges for network graph:",
                                          min = 1,
                                          max = 150,
                                          value = 50)
                            ),
                            
                            mainPanel(
                              wordcloud2Output(outputId = "WC"),
                              plotOutput(outputId = "figure")
                            )
                          )
                 )
)

Develop the server for the R Shiny app

server <- function(input, output, session) {
  
  pt1 <- reactive({
    if(!input$topFreqB) return(NULL)
    word_count %>%
      slice(1:input$topBarchart) %>%
      mutate(word = reorder(word, n)) %>%
      ggplot(aes(word, n)) +
      geom_col() +
      xlab(NULL) +
      ylab("Word Frequency")+
      coord_flip()
  })
  
  pt2 <- reactive({
    if(!input$topFreqN) return(NULL)
    bigram_graph <- bigram_counts %>%
      slice(1:input$topNetwork) %>%
      graph_from_data_frame()
    
    set.seed(123)
    
    x <- grid::arrow(type = "closed", length = unit(.1, "inches"))
    
    ggraph(bigram_graph, layout = "fr") +
      geom_edge_link(aes(edge_alpha = n), show.legend = FALSE,
                     arrow = x, end_cap = circle(.05, 'inches')) +
      geom_node_point(color = "skyblue", size = 3) +
      geom_node_text(aes(label = name), repel = TRUE) +
      theme_void()
  })
  
  output$WC <- renderWordcloud2({
    
    word_count %>%
      slice(1:input$topWordcloud) %>%
      wordcloud2(size = 0.6,
                 rotateRatio = 0)
    
  })
  
  output$figure <- renderPlot(height = 500, width = 500, {
    
    ptlist <- list(pt1(),pt2())
    ptlist <- ptlist[!sapply(ptlist, is.null)]
    if(length(ptlist)==0) return(NULL)
    
    lay <- rbind(c(1,1),
                 c(2,2))
    
    grid.arrange(grobs = ptlist, layout_matrix = lay)
  })
  
  

  
}

Run the R Shiny app

shinyApp(ui, server)
Shiny applications not supported in static R Markdown documents